;; DFA scheduling description of the Synopsys DesignWare ARC700 cpu
;; for GNU C compiler
;;    Comments and Support For ARC700 instructions added by
;;    Saurabh Verma (saurabh.verma@codito.com)
;;    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
;;    Factoring out and improvement of ARC700 Scheduling by
;;    Joern Rennecke (joern.rennecke@embecosm.com)
;; Copyright (C) 2006-2025 Free Software Foundation, Inc.

;; This file is part of GCC.

;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.

;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.

(define_automaton "ARC700")

;; aux to be added here
(define_cpu_unit "core, dmp,  write_port, dmp_write_port, multiplier, issue, blockage, simd_unit" "ARC700")

(define_insn_reservation "core_insn_DI" 2
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "unary, move, cmove, binary")
       (match_operand:DI 0 "" ""))
  "issue+core, issue+core+write_port, write_port")

(define_insn_reservation "lr" 2
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "lr"))
  "issue+blockage, blockage*2, write_port")

(define_insn_reservation "sr" 1
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "sr"))
  "issue+dmp_write_port+blockage, blockage*9")

(define_insn_reservation "core_insn" 1
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "unary, move, binary, add, sub, bxor"))
  "issue+core, nothing, write_port")

(define_insn_reservation "cmove" 1
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "cmove"))
  "issue+core, nothing, write_port")

(define_insn_reservation "cc_arith" 1
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "cc_arith"))
  "issue+core, nothing, write_port")

(define_insn_reservation "two_cycle_core_insn" 2
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "two_cycle_core"))
  "issue+core, nothing, write_port")

(define_insn_reservation "divaw_insn" 2
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "divaw"))
  "issue+core, nothing, write_port")

(define_insn_reservation "shift_insn" 2
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "shift"))
  "issue+core, nothing, write_port")

; Latency from flag setters to arithmetic with carry is 3.
(define_insn_reservation "compare_700" 3
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "compare"))
  "issue+core, nothing, write_port")

; Assume here the branch is predicted correctly and has a delay slot insn
; or is properly unaligned.
(define_insn_reservation "branch_700" 1
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "compare"))
  "issue+core, nothing, write_port")

; TODOs: is this correct ??
(define_insn_reservation "multi_DI" 10
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "multi")
       (match_operand:DI 0 "" ""))
  "issue+multiplier, multiplier*2,issue+multiplier, multiplier*2,
   nothing,write_port,nothing*2, write_port")

(define_insn_reservation "umulti_DI" 9
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "umulti")
       (match_operand:DI 0 "" ""))
  "issue+multiplier, multiplier,issue+multiplier, multiplier*2,
   write_port,nothing*3, write_port")

(define_insn_reservation "umulti_xmac" 5
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "umulti"))
  "issue+multiplier, multiplier, nothing*3, write_port")

; latency of mpyu is lower than mpy / mpyh / mpyhu
(define_insn_reservation "umulti_std" 6
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "umulti"))
  "issue+multiplier, multiplier*3, nothing*2, write_port")

;; arc700 xmac multiplier
(define_insn_reservation "multi_xmac" 5
  (and (eq_attr "tune" "arc700_4_2_xmac")
       (eq_attr "type" "multi"))
  "issue+multiplier,multiplier,nothing*3,write_port")

; arc700 standard multiplier
(define_insn_reservation "multi_std" 7
  (and (eq_attr "tune" "arc700_4_2_std")
       (eq_attr "type" "multi"))
  "issue+multiplier,multiplier*4,nothing*2,write_port")

;(define_insn_reservation "multi_SI" 7
;       (eq_attr "type" "multi")
;  "issue+multiplier, multiplier*2, nothing*4, write_port")

; There is no multiplier -> multiplier bypass except for the
; mac -> mac dependency on the accumulator.

; divaw -> divaw latency is 1 cycle
(define_bypass 1 "divaw_insn" "divaw_insn")

(define_bypass 1 "compare_700" "branch_700,core_insn,data_store,data_load")

; we could shedule the cmove immediately after the compare, but then
; the cmove would have higher latency... so just keep the cmove apart
; from the compare.
(define_bypass 2 "compare_700" "cmove")

; no functional unit runs when blockage is reserved
(exclusion_set "blockage" "core, multiplier")

(define_insn_reservation "data_load" 3
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "load"))
  "issue+dmp, nothing, dmp_write_port")

(define_insn_reservation "data_store" 1
  (and (eq_attr "tune_arc700" "true")
       (eq_attr "type" "store"))
  "issue+dmp_write_port")

(define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p")
